-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[DirectX] Add support for Raw Buffer Loads and Stores for scalars and vectors of doubles and i64s in SM6.2 and earlier #146627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-directx Author: Sarah Spall (spall) ChangesFor SM6.2 and earlier, Raw buffer Loads and Stores can't handle 64 bit types. This PR expands Raw Buffer Loads and Stores for 64 bit types. This Adds to the work done in #139996 and #145047 . Patch is 52.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146627.diff 9 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 435b80ecaec64..0770f03572d5a 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -71,12 +71,23 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
+ case Intrinsic::dx_resource_load_rawbuffer:
+ if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))
+ return false;
+ // fallthrough to check if double or i64
+ LLVM_FALLTHROUGH;
case Intrinsic::dx_resource_load_typedbuffer: {
// We need to handle i64, doubles, and vectors of them.
Type *ScalarTy =
F.getReturnType()->getStructElementType(0)->getScalarType();
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
}
+ case Intrinsic::dx_resource_store_rawbuffer: {
+ if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))
+ return false;
+ Type *ScalarTy = F.getFunctionType()->getParamType(3)->getScalarType();
+ return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
+ }
case Intrinsic::dx_resource_store_typedbuffer: {
// We need to handle i64 and doubles and vectors of i64 and doubles.
Type *ScalarTy = F.getFunctionType()->getParamType(2)->getScalarType();
@@ -544,7 +555,7 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
return Builder.CreateFMul(X, PiOver180);
}
-static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
+static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
IRBuilder<> Builder(Orig);
Type *BufferTy = Orig->getType()->getStructElementType(0);
@@ -552,55 +563,73 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
bool IsDouble = ScalarTy->isDoubleTy();
assert(IsDouble || ScalarTy->isIntegerTy(64) &&
"Only expand double or int64 scalars or vectors");
+ bool IsVector = isa<FixedVectorType>(BufferTy);
unsigned ExtractNum = 2;
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
- assert(VT->getNumElements() == 2 &&
- "TypedBufferLoad vector must be size 2");
- ExtractNum = 4;
+ if (!IsRaw)
+ assert(VT->getNumElements() == 2 &&
+ "TypedBufferLoad vector must be size 2");
+ ExtractNum = 2 * VT->getNumElements();
}
- Type *Ty = VectorType::get(Builder.getInt32Ty(), ExtractNum, false);
-
- Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
- CallInst *Load =
- Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer,
- {Orig->getOperand(0), Orig->getOperand(1)});
-
- // extract the buffer load's result
- Value *Extract = Builder.CreateExtractValue(Load, {0});
-
- SmallVector<Value *> ExtractElements;
- for (unsigned I = 0; I < ExtractNum; ++I)
- ExtractElements.push_back(
- Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
-
- // combine into double(s) or int64(s)
+ SmallVector<Value *, 2> Loads;
Value *Result = PoisonValue::get(BufferTy);
- for (unsigned I = 0; I < ExtractNum; I += 2) {
- Value *Combined = nullptr;
- if (IsDouble)
- // For doubles, use dx_asdouble intrinsic
- Combined =
- Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
- {ExtractElements[I], ExtractElements[I + 1]});
- else {
- // For int64, manually combine two int32s
- // First, zero-extend both values to i64
- Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
- Value *Hi =
- Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
- // Shift the high bits left by 32 bits
- Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
- // OR the high and low bits together
- Combined = Builder.CreateOr(Lo, ShiftedHi);
+ unsigned Base = 0;
+ while (ExtractNum > 0) {
+ unsigned LoadNum = std::min(ExtractNum, 4u);
+ Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false);
+
+ Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
+ Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
+ SmallVector<Value *, 3> Args = {Orig->getOperand(0), Orig->getOperand(1)};
+ if (IsRaw) {
+ LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
+ Value *Tmp = Builder.getInt32(4 * Base * 2);
+ Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
}
- if (ExtractNum == 4)
- Result = Builder.CreateInsertElement(Result, Combined,
- Builder.getInt32(I / 2));
- else
- Result = Combined;
+ CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
+ Loads.push_back(Load);
+
+ // extract the buffer load's result
+ Value *Extract = Builder.CreateExtractValue(Load, {0});
+
+ SmallVector<Value *> ExtractElements;
+ for (unsigned I = 0; I < LoadNum; ++I)
+ ExtractElements.push_back(
+ Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
+
+ // combine into double(s) or int64(s)
+ for (unsigned I = 0; I < LoadNum; I += 2) {
+ Value *Combined = nullptr;
+ if (IsDouble)
+ // For doubles, use dx_asdouble intrinsic
+ Combined = Builder.CreateIntrinsic(
+ Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+ {ExtractElements[I], ExtractElements[I + 1]});
+ else {
+ // For int64, manually combine two int32s
+ // First, zero-extend both values to i64
+ Value *Lo =
+ Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
+ Value *Hi =
+ Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
+ // Shift the high bits left by 32 bits
+ Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
+ // OR the high and low bits together
+ Combined = Builder.CreateOr(Lo, ShiftedHi);
+ }
+
+ if (IsVector)
+ Result = Builder.CreateInsertElement(Result, Combined,
+ Builder.getInt32((I / 2) + Base));
+ else
+ Result = Combined;
+ }
+
+ ExtractNum -= LoadNum;
+ Base += LoadNum / 2;
}
Value *CheckBit = nullptr;
@@ -620,8 +649,12 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
} else {
// Use of the check bit
assert(Indices[0] == 1 && "Unexpected type for typedbufferload");
- if (!CheckBit)
- CheckBit = Builder.CreateExtractValue(Load, {1});
+ if (!CheckBit) {
+ SmallVector<Value *, 2> CheckBits;
+ for (Value *L : Loads)
+ CheckBits.push_back(Builder.CreateExtractValue(L, {1}));
+ CheckBit = Builder.CreateAnd(CheckBits);
+ }
EVI->replaceAllUsesWith(CheckBit);
}
EVI->eraseFromParent();
@@ -630,10 +663,10 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
return true;
}
-static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
+static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
IRBuilder<> Builder(Orig);
- Type *BufferTy = Orig->getFunctionType()->getParamType(2);
+ Type *BufferTy = Orig->getFunctionType()->getParamType(IsRaw ? 3 : 2);
Type *ScalarTy = BufferTy->getScalarType();
bool IsDouble = ScalarTy->isDoubleTy();
assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
@@ -641,19 +674,24 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
// Determine if we're dealing with a vector or scalar
bool IsVector = isa<FixedVectorType>(BufferTy);
- if (IsVector) {
- assert(cast<FixedVectorType>(BufferTy)->getNumElements() == 2 &&
- "TypedBufferStore vector must be size 2");
+ unsigned ExtractNum = 2;
+ unsigned VecLen = 0;
+ if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
+ if (!IsRaw)
+ assert(VT->getNumElements() == 2 &&
+ "TypedBufferStore vector must be size 2");
+ VecLen = VT->getNumElements();
+ ExtractNum = VecLen * 2;
}
// Create the appropriate vector type for the result
Type *Int32Ty = Builder.getInt32Ty();
- Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false);
+ Type *ResultTy = VectorType::get(Int32Ty, ExtractNum, false);
Value *Val = PoisonValue::get(ResultTy);
Type *SplitElementTy = Int32Ty;
if (IsVector)
- SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+ SplitElementTy = VectorType::get(SplitElementTy, VecLen, false);
Value *LowBits = nullptr;
Value *HighBits = nullptr;
@@ -661,15 +699,16 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
if (IsDouble) {
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
- {Orig->getOperand(2)});
+ {Orig->getOperand(IsRaw ? 3 : 2)});
LowBits = Builder.CreateExtractValue(Split, 0);
HighBits = Builder.CreateExtractValue(Split, 1);
} else {
// Handle int64 type(s)
- Value *InputVal = Orig->getOperand(2);
+ Value *InputVal = Orig->getOperand(IsRaw ? 3 : 2);
Constant *ShiftAmt = Builder.getInt64(32);
if (IsVector)
- ShiftAmt = ConstantVector::getSplat(ElementCount::getFixed(2), ShiftAmt);
+ ShiftAmt =
+ ConstantVector::getSplat(ElementCount::getFixed(VecLen), ShiftAmt);
// Split into low and high 32-bit parts
LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
@@ -678,17 +717,42 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
}
if (IsVector) {
- Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+ SmallVector<int, 8> Mask;
+ for (unsigned I = 0; I < VecLen; ++I) {
+ Mask.push_back(I);
+ Mask.push_back(I + VecLen);
+ }
+ Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask);
} else {
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
}
- // Create the final intrinsic call
- Builder.CreateIntrinsic(Builder.getVoidTy(),
- Intrinsic::dx_resource_store_typedbuffer,
- {Orig->getOperand(0), Orig->getOperand(1), Val});
+ unsigned Base = 0;
+ while (ExtractNum > 0) {
+ unsigned StoreNum = std::min(ExtractNum, 4u);
+
+ Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
+ SmallVector<Value *, 4> Args = {Orig->getOperand(0), Orig->getOperand(1)};
+ if (IsRaw) {
+ StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
+ Value *Tmp = Builder.getInt32(4 * Base);
+ Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
+ }
+
+ SmallVector<int, 4> Mask;
+ for (unsigned I = 0; I < StoreNum; ++I) {
+ Mask.push_back(Base + I);
+ }
+ Value *SubVal = Builder.CreateShuffleVector(Val, Mask);
+
+ Args.push_back(SubVal);
+ // Create the final intrinsic call
+ Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args);
+ ExtractNum -= StoreNum;
+ Base += StoreNum;
+ }
Orig->eraseFromParent();
return true;
}
@@ -821,12 +885,20 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_radians:
Result = expandRadiansIntrinsic(Orig);
break;
+ case Intrinsic::dx_resource_load_rawbuffer:
+ if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true))
+ return true;
+ break;
+ case Intrinsic::dx_resource_store_rawbuffer:
+ if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ true))
+ return true;
+ break;
case Intrinsic::dx_resource_load_typedbuffer:
- if (expandTypedBufferLoadIntrinsic(Orig))
+ if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ false))
return true;
break;
case Intrinsic::dx_resource_store_typedbuffer:
- if (expandTypedBufferStoreIntrinsic(Orig))
+ if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ false))
return true;
break;
case Intrinsic::usub_sat:
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
index 9c3dab0cc1e46..560bb56d34d45 100644
--- a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -16,8 +16,10 @@ define void @storef64(double %0) {
; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1
+ ; this shufflevector is unnecessary but generated to avoid specalization
+ ; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
- ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
+ ; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec3]])
call void @llvm.dx.resource.store.typedbuffer(
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
double %0)
@@ -38,8 +40,10 @@ define void @storev2f64(<2 x double> %0) {
; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ ; this shufflevector is unnecessary but generated to avoid specalization
+ ; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
- ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
+ ; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec2]])
call void @llvm.dx.resource.store.typedbuffer(
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
<2 x double> %0)
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
index c97a02d1873a0..31031804a0e8b 100644
--- a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
@@ -12,7 +12,9 @@ define void @storei64(i64 %0) {
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i32 1
-; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP6]])
+; the shufflevector is unnecessary but generated to avoid too much specalization
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP7]])
; CHECK-NEXT: ret void
;
%buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
@@ -29,7 +31,9 @@ define void @storev2i64(<2 x i64> %0) {
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP0]], splat (i64 32)
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP13]])
+; the shufflevector is unnecessary but generated to avoid too much specalization
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP14]])
; CHECK-NEXT: ret void
;
%buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll b/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll
deleted file mode 100644
index d8b6311c8ff2e..0000000000000
--- a/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; We use llc for this test so that we don't abort after the first error.
-; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
-
-target triple = "dxil-pc-shadermodel6.2-compute"
-
-declare void @v4f64_user(<4 x double>)
-
-; Can't load 64 bit types directly until SM6.3 (byteaddressbuf.Load<int64_t4>)
-; CHECK: error:
-; CHECK-SAME: in function loadv4f64_byte
-; CHECK-SAME: Cannot create RawBufferLoad operation: Invalid overload type
-define void @loadv4f64_byte(i32 %offset) "hlsl.export" {
- %buffer = call target("dx.RawBuffer", i8, 0, 0, 0)
- @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0(
- i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
-
- %load = call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer.v4i64(
- target("dx.RawBuffer", i8, 0, 0, 0) %buffer, i32 %offset, i32 0)
- %data = extractvalue {<4 x double>, i1} %load, 0
-
- call void @v4f64_user(<4 x double> %data)
-
- ret void
-}
diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll
new file mode 100644
index 0000000000000..dc0c19dad9e06
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/RawBufferLoadDouble.ll
@@ -0,0 +1,199 @@
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.2-compute"
+
+define void @loadf64(i32 %index) {
+ ; check the handle from binding is unchanged
+ ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", double, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_0_0t(
+ ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+ %buffer = call target("dx.Rawbuffer", double, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+
+ ; check we load an <2 x i32> instead of a double
+ ; CHECK-NOT: call {double, i1} @llvm.dx.resource.load.rawbuffer
+ ; CHECK: [[L0:%.*]] = call { <2 x i32>, i1 }
+ ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v2i32.tdx.Rawbuffer_f64_0_0t(
+ ; CHECK-SAME: target("dx.Rawbuffer", double, 0, 0) [[B]], i32 %index, i32 0)
+ %load0 = call {double, i1} @llvm.dx.resource.load.rawbuffer(
+ target("dx.Rawbuffer", double, 0, 0) %buffer, i32 %index, i32 0)
+
+ ; check we extract the two i32 and construct a double
+ ; CHECK: [[D0:%.*]] = extractvalue { <2 x i32>, i1 } [[L0]], 0
+ ; CHECK: [[Lo:%.*]] = extractelement <2 x i32> [[D0]], i32 0
+ ; CHECK: [[Hi:%.*]] = extractelement <2 x i32> [[D0]], i32 1
+ ; CHECK: [[DBL:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[Lo]], i32 [[Hi]])
+ ; CHECK-NOT: extractvalue { double, i1 }
+ %data0 = extractvalue {double, i1} %load0, 0
+ ret void
+}
+
+define void @loadv2f64(i32 %index) {
+ ; check the handle from binding is unchanged
+ ; CHECK: [[B:%.*]] = call target("dx.Rawbuffer", <2 x double>, 0, 0)
+ ; CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2f64_0_0t(
+ ; CHECK-SAME: i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+ %buffer = call target("dx.Rawbuffer", <2 x double>, 0, 0)
+ @llvm.dx.resource.handlefrombinding.tdx.Rawbuffer_v2f64_1_0_0t(
+ i32 0, i32 1, i32 1, i32 0, i1 false, ptr null)
+
+ ; check we load an <4 x i32> instead of a double2
+ ; CHECK: [[L0:%.*]] = call { <4 x i32>, i1 }
+ ; CHECK-SAME: @llvm.dx.resource.load.rawbuffer.v4i32.tdx.Rawbuffer_v2f64_0_0t(
+ ; CHECK-SAME: target("dx.Rawbuffer", <2 x double>, 0, 0) [[B]], i32 %index, i32 0)
+ %load0 = cal...
[truncated]
|
The |
We need a typedbuffer test cases that has two runlines. One where we specify in the target flag sm6.2 and one where we specify sm6.3. This way we have a case where we show the backport support and the difference of behaviors between the two shader models. |
This PR is only meant to address RawBuffers; you want a test to show this doesn't regress the behavior for typedbuffers in SM6.3? |
The Store tests were updated because of a new ShuffleVector generated, but the Load case didn't have any new instructions generated in all cases. |
if (IsVector) | ||
Result = Builder.CreateInsertElement(Result, Combined, | ||
Builder.getInt32((I / 2) + Base)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be something like ExtractNum > 2
rather than IsVector
? For odd-length vectors I'd expect the last value to be scalar.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For an odd length vector of length 3, we still need our Result to be a vector, and ExtractNum changes during the loop, so ExtractNum would be 2 when we're loading the 3rd value in a vec3.
If you meant to test that RawBuffer Load and Store work in SM6.3 and later; there are existing tests in RawBufferStore.ll and RawBufferLoad.ll |
Not exactly We have some work we need to do to document all the backporting work So what I'd like to do is have test cases with seperate runlines for the different shader models for all the cases where we are making active decisions to be different based on shader model like this PR is doing. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code looks good. We should probably discuss how to test and document cases where we change behavior based on shader model since it is something we will need to work on in the near future.
For SM6.2 and earlier, Raw buffer Loads and Stores can't handle 64 bit types. This PR expands Raw Buffer Loads and Stores for 64 bit types double and int64_t. This Adds to the work done in #139996 and #145047 .
Raw Buffer Loads and Stores allow for 64 bit type vectors of size 3 and 4, and the code is modified to allow for that.
Closes #144747